import os
import sys
import time


def write_mtd(out_file_name, row, col, nnz):
    fp = open(out_file_name, "w")
    mtd = '{\n\
        "data_type": "matrix",\n\
        "value_type": "double",\n\
        "rows": ' + str(row) + ' ,\n\
        "cols": ' + str(col) + ',\n\
        "nnz": ' + str(nnz) + ',\n\
        "format": "text",\n\
        "author": "SystemML",\n\
        "created": \"' + time.strftime("%Y-%m-%d %H:%M:%S UTC", time.localtime()) + '\"\n}'
    print(out_file_name + ":\n" + mtd)
    fp.write(mtd)
    fp.close()


def libsvm2sysml(in_file_name, out_file_name, row, col):
    fin = open(in_file_name, "r")
    a_txt_out = open(out_file_name + "-a.txt", "w")
    b_txt_out = open(out_file_name + "-b.txt", "w")
    annz = 0
    bnnz = 0
    line_num = 0
    for line in fin:
        line_num = line_num + 1
        if line_num > row:
            break
        tokens = line.replace("\n", "").split(" ")
        if float(tokens[0]) != 0.0:
            bnnz = bnnz + 1
            b_txt_out.write(str(line_num) + " 1 " + tokens[0] + "\n")
        for token in tokens[1:]:
            arr = token.split(":")
            if len(arr) == 2:
                a_txt_out.write(str(line_num) + " " + arr[0] + " " + arr[1] + "\n")
                annz = annz + 1
        if line_num % 1000 == 0:
            print("lines:" + str(line_num))
    fin.close()
    a_txt_out.close()
    b_txt_out.close()
    write_mtd(out_file_name + "-a.txt.mtd", row, col, annz)
    write_mtd(out_file_name + "-b.txt.mtd", row, col, bnnz)


def write_dml(out_file_name):
    dml = 'a = read("hdfs://10.11.1.209:9000/data/' + out_file_name + '/' + out_file_name + '-a.txt",format="text")\n\
b = read("hdfs://10.11.1.209:9000/data/' + out_file_name + '/' + out_file_name + '-b.txt",format="text")\n\
write(a,"hdfs://10.11.1.209:9000/data/' + out_file_name + '/' + out_file_name + '-a.bin",format="binary")\n\
write(b,"hdfs://10.11.1.209:9000/data/' + out_file_name + '/' + out_file_name + '-b.bin",format="binary")'
    fp = open('./txt2bin.dml', 'w')
    fp.write(dml)
    fp.close()


def calls(out_file_name):
    os.system("~/hadoop-2.9.2/bin/hdfs dfs -mkdir /data/" + out_file_name)
    os.system("~/hadoop-2.9.2/bin/hdfs dfs -put " + out_file_name + "-* /data/" + out_file_name + "/")
    os.system("~/hadoop-2.9.2/bin/hdfs dfs -ls /data/" + out_file_name + "/")
    os.system("~/spark-2.4.5/bin/spark-submit --master spark://10.11.1.209:7077 --executor-memory 32G \
--driver-memory 16G ~/systemml-1.2.0/SystemML.jar -f ./txt2bin.dml")
    os.system("~/hadoop-2.9.2/bin/hdfs dfs -ls /data/" + out_file_name + "/")


if __name__ == '__main__':
    try:
        in_file_name = sys.argv[1]
        out_file_name = sys.argv[2]
        row = int(sys.argv[3])
        col = int(sys.argv[4])
    except:
        sys.exit("ERROR: wrong parameters")
    libsvm2sysml(in_file_name, out_file_name, row, col)
    write_dml(out_file_name)
    calls(out_file_name)
